############# GOVERNMENT 2001 FINAL PAPER REPLICATION CODE #############
############### NINA MCMURRY AND JONATHAN WEIGEL #######################
#########################################################################

library(foreign)
library(ggplot2)
library(mvtnorm)
library(sandwich)
library(lmtest)


### Loading mclx function for multiple cluster robust standard errors
### Throughout this uses double clustered standard errors (clustering on country and ethno-linguistic family)

mclx <- 
  function(fm, dfcw, cluster1, cluster2){
    cluster12 = paste(cluster1,cluster2, sep="")
    M1  <- length(unique(cluster1))
    M2  <- length(unique(cluster2))   
    M12 <- length(unique(cluster12))
    N   <- length(cluster1)          
    K   <- fm$rank             
    dfc1  <- (M1/(M1-1))*((N-1)/(N-K))  
    dfc2  <- (M2/(M2-1))*((N-1)/(N-K))  
    dfc12 <- (M12/(M12-1))*((N-1)/(N-K))  
    u1j   <- apply(estfun(fm), 2, function(x) tapply(x, cluster1,  sum)) 
    u2j   <- apply(estfun(fm), 2, function(x) tapply(x, cluster2,  sum)) 
    u12j  <- apply(estfun(fm), 2, function(x) tapply(x, cluster12, sum))
    vc1   <-  dfc1*sandwich(fm, meat=crossprod(u1j)/N )
    vc2   <-  dfc2*sandwich(fm, meat=crossprod(u2j)/N )
    vc12  <- dfc12*sandwich(fm, meat=crossprod(u12j)/N)
    vcovMCL <- (vc1 + vc2 - vc12)*dfcw
    coeftest(fm, vcovMCL)}


########################################################## FIGURE 1 #########################################################
############################################## HOUSEHOLD WEALTH AND LUMINOSITY WITHIN COUNTRIES ############################

### Loading DHS data from Michalopoulos and Papainou

drc<-read.dta("DHS_DRC2007-final.dta")

head(drc)

nga<-read.dta("DHS_NGA2008-final.dta")
tza<-read.dta("DHS_TZA2007-final.dta")
zwe<-read.dta("DHS_ZWE2005-final.dta")

#### Figure 2A: Tanzania ####

### Regressing wealth and lights on population density

tza.y<-lm(tza$wealthhnum~tza$lnpd0)
tza.x<-lm(tza$lnlights0708~tza$lnpd0)

### Storing the residuals

tza$tzax<-tza.x$resid
tza$tzay<-tza.y$resid

### Creating the plot

tza.plot2<-ggplot(tza,aes(x=tzax,y=tzay))+geom_point(color="peru")+geom_smooth(method="lm",formula=y~x,color="steelblue4")+ggtitle(expression(atop("Household Wealth and Light Density: DHS Clusters in Tanzania",atop("Conditional on Population Density in 2000",""))))+xlab("Log Light Density in a 10 km Radius of a DHS Cluster")+ylab("Household Wealth Index of a DHS Cluster")


#### Figure 2B: Zimbabwe ####

### Regressing wealth and lights on population density

zwe.y<-lm(zwe$wealthhnum~zwe$lnpd0)
zwe.x<-lm(zwe$lnlights0708~zwe$lnpd0)

### Storing the residuals

zwe$zwex<-zwe.x$resid
zwe$zwey<-zwe.y$resid

### Creating the plot

zwe.plot2<-ggplot(zwe,aes(x=zwex,y=zwey))+geom_point(color="peru")+geom_smooth(method="lm",formula=y~x,color="steelblue4")+ggtitle(expression(atop("Household Wealth and Light Density: DHS Clusters in Zimbabwe",atop("Conditional on Population Density in 2000",""))))+xlab("Log Light Density in a 10 km Radius of a DHS Cluster")+ylab("Household Wealth Index of a DHS Cluster")

### Figure 2C: DRC ####

### Regressing wealth and lights on population density

drc.y<-lm(drc$wealthhnum~drc$lnpd0)
drc.x<-lm(drc$lnlights0708~drc$lnpd0)

### Storing the residuals

drc$drcx<-drc.x$resid
drc$drcy<-drc.y$resid

### Creating the plot

drc.plot2<-ggplot(drc,aes(x=drcx,y=drcy))+geom_point(color="peru")+geom_smooth(method="lm",formula=y~x,color="steelblue4")+ggtitle(expression(atop("Household Wealth and Light Density: DHS Clusters in Congo DRC",atop("Conditional on Population Density in 2000",""))))+xlab("Log Light Density in a 10 km Radius of a DHS Cluster")+ylab("Household Wealth Index of a DHS Cluster")

### Figure 2D: Nigeria ####

### Removing missing data for wealth

nga<-nga[-which(is.na(nga$wealthhnum)),]

### Regressing wealth and lights on population density

nga.y<-lm(nga$wealthhnum~nga$lnpd0)
nga.x<-lm(nga$lnlights0708~nga$lnpd0)

### Storing the residuals

nga$ngax<-nga.x$resid
nga$ngay<-nga.y$resid

### Plotting

nga.plot2<-ggplot(nga,aes(x=ngax,y=ngay))+geom_point(color="peru")+geom_smooth(method="lm",formula=y~x,color="steelblue4")+ggtitle(expression(atop("Household Wealth and Light Density: DHS Clusters in Nigeria",atop("Conditional on Population Density in 2000",""))))+xlab("Log Light Density in a 10 km Radius of a DHS Cluster")+ylab("Household Wealth Index of a DHS Cluster")

### Plotting all four on one image

plot<-grid.arrange(tza.plot2,zwe.plot2,drc.plot2,nga.plot2,nrow=2,ncol=2,main=textGrob("Household wealth and luminosity within countries"))

##########################################################TABLE 1###########################################################
################################################SUMMARY STATISTICS########################################################


##LOAD MAIN DATASET##

data <- read.dta("ethnicity-country-data-final.dta")

head(data)
table(data$name)

data.cam<-data[which(data$wbcountry=="CAMEROON"),]

data.cam$name

##LOAD DATA FOR PIXEL LEVEL ANALYSIS##

data1 <- read.dta("pixel-level-baseline-final.dta")

####Creating variables####

head(data1)

##Night lights variables
mean_lights0708<-data$mean_lights0708 
lnlights_nozeros<-data$lnlights_nozeros 
lnlights0708<-data$lnlights_nozeros
l0708<-data1$l0708
l0708d<-data1$l0708d
lnl0708s<-data1$lnl0708s

##Murdoch variables
centr_tribe<-data$centr_tribe
centr_tribe2<-data1$centr_tribe
country<-data$wbcountry
ethnicity<-data$cluster
gr<-data$gr

#location controls
capdistance1<-data$capdistance1
seadist1<-data$seadist1
borderdist1<-data$borderdist1

#Geographic controls
lnwaterkm<-data$lnwaterkm
lnkm2split<-data$lnkm2split
mean_elev<-data$mean_elev
mean_suit<-data$mean_suit
malariasuit<-data$malariasuit
petroleum<-data$petroleum
diamond<-data$diamond

##Other controls
lnpdmean00s<-data$lnpdmean00s
rlaw07<-data$rlaw07
lnrgdpch07<-data$lnrgdpch07
obs<-data$obs


##############Panel A - All obs#################

panel_a<-matrix(data=NA, nrow=4, ncol=8)

#Light density
panel_a[1,1:3]<-c(length(mean_lights0708), round(mean(mean_lights0708), 3), round(sqrt(var(mean_lights0708)),3))
panel_a[1,4:6]<-round(quantile(mean_lights0708, probs=c(.25,.5,.75)), 3)
panel_a[1,7:8]<-c(min(mean_lights0708), round(max(mean_lights0708),3))

#Log(0.01 + light density)
panel_a[2,1:3]<-c(length(lnlights0708), round(mean(lnlights0708), 3), round(sqrt(var(lnlights0708)),3))
panel_a[2,4:6]<-round(quantile(lnlights0708, probs=c(.25,.5,.75)), 3)
panel_a[2,7:8]<-c(round(min(lnlights0708),3), round(max(lnlights0708),3))

#Pixel level light density
panel_a[3,1:3]<-c(length(l0708), round(mean(l0708), 3), round(sqrt(var(l0708)),3))
panel_a[3,4:6]<-round(quantile(l0708, probs=c(.25,.5,.75)), 3)
panel_a[3,7:8]<-c(round(min(l0708),3), round(max(l0708),3))

#Lit pixel

panel_a[4,1:3]<-c(length(l0708d), round(mean(l0708d), 3), round(sqrt(var(l0708d)),3))
panel_a[4,4:6]<-round(quantile(l0708d, probs=c(.25,.5,.75)), 3)
panel_a[4,7:8]<-c(round(min(l0708d),3), round(max(l0708d),3))

colnames(panel_a)<-c("Obs.", "Mean", "St. Dev.", "p25", "Median", "p75", "Min", "Max")
rownames(panel_a)<-c("Light density", "Log(0.01 + light density)", "Pixel-level light density", "Lit pixel")

#################Panel B: Stateless Ethnicities##################

panel_b<-matrix(data=NA, nrow=4, ncol=8)

#Light density
panel_b[1,1:3]<-c(sum(!is.na(mean_lights0708[centr_tribe==0])), round(mean(mean_lights0708[centr_tribe==0],na.rm=TRUE), 3), round(sqrt(var(mean_lights0708[centr_tribe==0])),3))
panel_b[1,4:6]<-round(quantile(mean_lights0708[centr_tribe==0], probs=c(.25,.5,.75)), 3)
panel_b[1,7:8]<-c(min(mean_lights0708[centr_tribe==0], na.rm=TRUE), round(max(mean_lights0708[centr_tribe==0], na.rm=TRUE),3))

#ln(0.01+ Light Density)
panel_b[2,1:3]<-c(sum(!is.na(lnlights0708[centr_tribe==0])), round(mean(lnlights0708[centr_tribe==0],na.rm=TRUE), 3), round(sqrt(var(lnlights0708[centr_tribe==0])),3))
panel_b[2,4:6]<-round(quantile(lnlights0708[centr_tribe==0], probs=c(.25,.5,.75)), 3)
panel_b[2,7:8]<-c(round(min(lnlights0708[centr_tribe==0], na.rm=TRUE),3), round(max(lnlights0708[centr_tribe==0], na.rm=TRUE),3))

#Pixel level light density
panel_b[3,1:3]<-c(sum(!is.na(l0708[centr_tribe2==0])), round(mean(l0708[centr_tribe2==0],na.rm=TRUE), 3), round(sqrt(var(l0708[centr_tribe2==0])),3))
panel_b[3,4:6]<-round(quantile(l0708[centr_tribe2==0], probs=c(.25,.5,.75)), 3)
panel_b[3,7:8]<-c(round(min(l0708[centr_tribe2==0], na.rm=TRUE),3), round(max(l0708[centr_tribe2==0], na.rm=TRUE),3))

#Lit pixel
panel_b[4,1:3]<-c(sum(!is.na(l0708d[centr_tribe2==0])), round(mean(l0708d[centr_tribe2==0],na.rm=TRUE), 3), round(sqrt(var(l0708d[centr_tribe2==0])),3))
panel_b[4,4:6]<-round(quantile(l0708d[centr_tribe2==0], probs=c(.25,.5,.75)), 3)
panel_b[4,7:8]<-c(round(min(l0708d[centr_tribe2==0], na.rm=TRUE),3), round(max(l0708d[centr_tribe2==0], na.rm=TRUE),3))

rownames(panel_b)<-c("Light density1", "Log(0.01 + light density)1", "Pixel-level light density1", "Lit pixel1")

##############################Panel C: Petty Chiefdoms#############################

panel_c<-matrix(data=NA, nrow=4, ncol=8)

#Light density

panel_c[1,1:3]<-c(sum(!is.na(mean_lights0708[centr_tribe==1])), round(mean(mean_lights0708[centr_tribe==1],na.rm=TRUE), 3), round(sqrt(var(mean_lights0708[centr_tribe==1])),3))
panel_c[1,4:6]<-round(quantile(mean_lights0708[centr_tribe==1], probs=c(.25,.5,.75)), 3)
panel_c[1,7:8]<-c(min(mean_lights0708[centr_tribe==1], na.rm=TRUE), round(max(mean_lights0708[centr_tribe==1], na.rm=TRUE),3))

#ln(0.01+ Light Density)

panel_c[2,1:3]<-c(sum(!is.na(lnlights0708[centr_tribe==1])), round(mean(lnlights0708[centr_tribe==1],na.rm=TRUE), 3), round(sqrt(var(lnlights0708[centr_tribe==1])),3))
panel_c[2,4:6]<-round(quantile(lnlights0708[centr_tribe==1], probs=c(.25,.5,.75)), 3)
panel_c[2,7:8]<-c(round(min(lnlights0708[centr_tribe==1], na.rm=TRUE),3), round(max(lnlights0708[centr_tribe==1], na.rm=TRUE),3))

#Pixel level light density

panel_c[3,1:3]<-c(sum(!is.na(l0708[centr_tribe2==1])), round(mean(l0708[centr_tribe2==1],na.rm=TRUE), 3), round(sqrt(var(l0708[centr_tribe2==1])),3))
panel_c[3,4:6]<-round(quantile(l0708[centr_tribe2==1], probs=c(.25,.5,.75)), 3)
panel_c[3,7:8]<-c(round(min(l0708[centr_tribe2==1], na.rm=TRUE),3), round(max(l0708[centr_tribe2==1], na.rm=TRUE),3))

#Lit pixel

panel_c[4,1:3]<-c(sum(!is.na(l0708d[centr_tribe2==1])), round(mean(l0708d[centr_tribe2==1],na.rm=TRUE), 3), round(sqrt(var(l0708d[centr_tribe2==1])),3))
panel_c[4,4:6]<-round(quantile(l0708d[centr_tribe2==1], probs=c(.25,.5,.75)), 3)
panel_c[4,7:8]<-c(round(min(l0708d[centr_tribe2==1], na.rm=TRUE),3), round(max(l0708d[centr_tribe2==1], na.rm=TRUE),3))

rownames(panel_c)<-c("Light density2", "Log(0.01 + light density)2", "Pixel-level light density2", "Lit pixel2")

##################################Panel D: Paramount Chiefdoms###############################

panel_d<-matrix(data=NA, nrow=4, ncol=8)

#Light density

panel_d[1,1:3]<-c(sum(!is.na(mean_lights0708[centr_tribe==2])), round(mean(mean_lights0708[centr_tribe==2],na.rm=TRUE), 3), round(sqrt(var(mean_lights0708[centr_tribe==2])),3))
panel_d[1,4:6]<-round(quantile(mean_lights0708[centr_tribe==2], probs=c(.25,.5,.75)), 3)
panel_d[1,7:8]<-c(min(mean_lights0708[centr_tribe==2], na.rm=TRUE), round(max(mean_lights0708[centr_tribe==2], na.rm=TRUE),3))

#ln(0.01+ Light Density)

panel_d[2,1:3]<-c(sum(!is.na(lnlights0708[centr_tribe==2])), round(mean(lnlights0708[centr_tribe==2],na.rm=TRUE), 3), round(sqrt(var(lnlights0708[centr_tribe==2])),3))
panel_d[2,4:6]<-round(quantile(lnlights0708[centr_tribe==2], probs=c(.25,.5,.75)), 3)
panel_d[2,7:8]<-c(round(min(lnlights0708[centr_tribe==2], na.rm=TRUE),3), round(max(lnlights0708[centr_tribe==2], na.rm=TRUE),3))

#Pixel level light density

panel_d[3,1:3]<-c(sum(!is.na(l0708[centr_tribe2==2])), round(mean(l0708[centr_tribe2==2],na.rm=TRUE), 3), round(sqrt(var(l0708[centr_tribe2==2])),3))
panel_d[3,4:6]<-round(quantile(l0708[centr_tribe2==2], probs=c(.25,.5,.75)), 3)
panel_d[3,7:8]<-c(round(min(l0708[centr_tribe2==2], na.rm=TRUE),3), round(max(l0708[centr_tribe2==2], na.rm=TRUE),3))

#Lit pixel

panel_d[4,1:3]<-c(sum(!is.na(l0708d[centr_tribe2==2])), round(mean(l0708d[centr_tribe2==2],na.rm=TRUE), 3), round(sqrt(var(l0708d[centr_tribe2==2])),3))
panel_d[4,4:6]<-round(quantile(l0708d[centr_tribe2==2], probs=c(.25,.5,.75)), 3)
panel_d[4,7:8]<-c(round(min(l0708d[centr_tribe2==2], na.rm=TRUE),3), round(max(l0708d[centr_tribe2==2], na.rm=TRUE),3))

rownames(panel_d)<-c("Light density3", "Log(0.01 + light density)3", "Pixel-level light density3", "Lit pixel3")

###########################################Panel E: Pre-Colonial States################################

panel_e<-matrix(data=NA, nrow=4, ncol=8)

#Light density

panel_e[1,1:3]<-c(sum(!is.na(mean_lights0708[centr_tribe==3|centr_tribe==4])), round(mean(mean_lights0708[centr_tribe==3|centr_tribe==4],na.rm=TRUE), 3), round(sqrt(var(mean_lights0708[centr_tribe==3|centr_tribe==4])),3))
panel_e[1,4:6]<-round(quantile(mean_lights0708[centr_tribe==3|centr_tribe==4], probs=c(.25,.5,.75)), 3)
panel_e[1,7:8]<-c(min(mean_lights0708[centr_tribe==3|centr_tribe==4], na.rm=TRUE), round(max(mean_lights0708[centr_tribe==3|centr_tribe==4], na.rm=TRUE),3))

#ln(0.01+ Light Density)

panel_e[2,1:3]<-c(sum(!is.na(lnlights0708[centr_tribe==3|centr_tribe==4])), round(mean(lnlights0708[centr_tribe==3|centr_tribe==4],na.rm=TRUE), 3), round(sqrt(var(lnlights0708[centr_tribe==3|centr_tribe==4])),3))
panel_e[2,4:6]<-round(quantile(lnlights0708[centr_tribe==3|centr_tribe==4], probs=c(.25,.5,.75)), 3)
panel_e[2,7:8]<-c(round(min(lnlights0708[centr_tribe==3|centr_tribe==4], na.rm=TRUE),3), round(max(lnlights0708[centr_tribe==3|centr_tribe==4], na.rm=TRUE),3))

#Pixel level light density

panel_e[3,1:3]<-c(sum(!is.na(l0708[centr_tribe2==3|centr_tribe2==4])), round(mean(l0708[centr_tribe2==3|centr_tribe2==4],na.rm=TRUE), 3), round(sqrt(var(l0708[centr_tribe2==3|centr_tribe2==4])),3))
panel_e[3,4:6]<-round(quantile(l0708[centr_tribe2==3|centr_tribe2==4], probs=c(.25,.5,.75)), 3)
panel_e[3,7:8]<-c(round(min(l0708[centr_tribe2==3|centr_tribe2==4], na.rm=TRUE),3), round(max(l0708[centr_tribe2==3|centr_tribe2==4], na.rm=TRUE),3))

#Lit pixel

panel_e[4,1:3]<-c(sum(!is.na(l0708d[centr_tribe2==3|centr_tribe2==4])), round(mean(l0708d[centr_tribe2==3|centr_tribe2==4],na.rm=TRUE), 3), round(sqrt(var(l0708d[centr_tribe2==3|centr_tribe2==4])),3))
panel_e[4,4:6]<-round(quantile(l0708d[centr_tribe2==3|centr_tribe2==4], probs=c(.25,.5,.75)), 3)
panel_e[4,7:8]<-c(round(min(l0708d[centr_tribe2==3|centr_tribe2==4], na.rm=TRUE),3), round(max(l0708d[centr_tribe2==3|centr_tribe2==4], na.rm=TRUE),3))

rownames(panel_e)<-c("Light density4", "Log(0.01 + light density)4", "Pixel-level light density4", "Lit pixel4")

############ Creating the full table##########

tab1<-rbind(panel_a,panel_b,panel_c,panel_d,panel_e)
xtable(tab1,digits=c(0,0,3,3,3,3,3,3,3))




############################################
########DIAGNOSING  MISSPECIFICATION########
############################################

setwd("/Users/jonathanlweigel/Documents/PEG/Spring 2014/Gov 2001/Replication/Paper/Final")
load("data1")
data<-data1
data$petty<-data$centr_tribe==1
data$paramount<-data$centr_tribe==2
data$state<-data$centr_tribe==3 | data$centr_tribe==4

#Subset data so there are no NAs in the pixel related control variables
data1<-na.omit(data)



#############################################################
#########################BINARY DV###########################
#############################################################


###########################################
#######TABLE 2: LPM VS. LOGIT##############
###########################################

#Column 1
model1<-lm(l0708d~centr_tribe, data=data)
summary(model1)
mclx(model1, 1, data$pixwbcode, data$pixcluster)

#Column 2
model2<-lm(l0708d~centr_tribe+lnpd0+factor(data$pixwbcode), data=data)
summary(model2)
mclx(model2, 1, data$pixwbcode, data$pixcluster)

#Column 3
model3<-lm(l0708d~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-glm(l0708d~centr_tribe, data=data,family="binomial")
summary(model4)
mclx(model4, 1, data$pixwbcode, data$pixcluster)

#Column 5
model5<-glm(l0708d~centr_tribe+lnpd0+factor(data$pixwbcode), data=data,family="binomial")
summary(model5)
mclx(model5, 1, data$pixwbcode, data$pixcluster)

#Column 6
model6<-glm(l0708d~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)


########################################################## FIGURE 3 #########################################################
############################################## FIRST DIFFERENCES PLOT ###########################################


### Simulating predicted probabilities

### Loading pixel level data

data1 <- read.dta("pixel-level-baseline-final.dta")

### Model 1: Bivariate OLS regression of binary pixel luminosity on jurisdictional hierarchy

### Creating a function for the log likelihood (normal)

ll.norm<-function(par,x,y){
  y<-as.matrix(y)
  x<-as.matrix(x)
  betas<-as.matrix(par[1:ncol(x)])
  sigma2<-exp(par[ncol(x)+1])
  -.5*sum(log(sigma2)+((y-x%*%betas)^2)/sigma2)
}

### Optimizing

par<-rep(0,3)
opt.norm<-optim(par=par,fn=ll.norm,x=cbind(1,data1$centr_tribe),y=data1$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Generating 10000 parameter vectors

varcov2<--solve(opt.norm$hessian)

library(mvtnorm)
set.seed(8766)
sim.betas2<-rmvnorm(n=10000,mean=opt.norm$par,sigma=varcov2)

### Creating vectors of covariates with different levels of centr_tribe

covs0<-c(1,0)
covs1<-c(1,1)
covs2<-c(1,2)
covs3<-c(1,3)
covs4<-c(1,4)

### Generating mean values using the systematic component

xbetas0<-covs0%*%t(sim.betas2[,-3])
xbetas1<-covs1%*%t(sim.betas2[,-3])
xbetas2<-covs2%*%t(sim.betas2[,-3])
xbetas3<-covs3%*%t(sim.betas2[,-3])
xbetas4<-covs4%*%t(sim.betas2[,-3])

### Transforming re-paramterized sigma2 

sigma2<-exp(sim.betas2[,3])
sigma<-sqrt(sigma2)

### Writing a function to simulate from the stochastic component

pv.fun<-function(mean,sd){
  pvs<-rnorm(n=1,mean=mean,sd=sd)
}

### Simulating from the stochastic component

set.seed(8766)
pvs0<-sapply(X=sigma,FUN=pv.fun,mean=xbetas0)
pvs1<-sapply(X=sigma,FUN=pv.fun,mean=xbetas1)
pvs2<-sapply(X=sigma,FUN=pv.fun,mean=xbetas2)
pvs3<-sapply(X=sigma,FUN=pv.fun,mean=xbetas3)


### FIRST DIFFERENCES

### Writing a function to produce expected values

evnorm.fun<-function(mean,sd){
  sims<-rnorm(n=1000,mean=mean,sd=sd)
  evs<-mean(sims)
}

### Generating expected values for jurisdictional hierarchy levels of 0 and 2

set.seed(8766)
evs0<-sapply(X=sigma,FUN=evnorm.fun,mean=xbetas0)
evs2<-sapply(X=sigma,FUN=evnorm.fun,mean=xbetas2)

### Creating a vector of first differences and taking the mean

fdiff12<-evs2-evs0
fdiffmean12<-mean(fdiff12)

### Calculating 99% confidence intervals

upperci12<-sort(fdiff12)[9900]
lowerci12<-sort(fdiff12)[100]


### Model 2: OLS controlling for population density

### Optimizing

par<-rep(0,4)
opt.norm2<-optim(par=par,fn=ll.norm,x=cbind(1,data1$centr_tribe,data1$lnpd0),y=data1$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Simulating 10000 parameter vectors

varcov2a<--solve(opt.norm2$hessian)

set.seed(8766)
sim.betas2a<-rmvnorm(n=10000,mean=opt.norm2$par,sigma=varcov2a)

### Creating vectors of covariates with different levels of centr_tribe and pop density at its median

covs0n<-c(1,0,median(data1$lnpd0))
covs1n<-c(1,1,median(data1$lnpd0))
covs2n<-c(1,2,median(data1$lnpd0))
covs3n<-c(1,3,median(data1$lnpd0))
covs4n<-c(1,4,median(data1$lnpd0))

### Generating mean values using the systematic component

xbetas0n<-covs0n%*%t(sim.betas2a[,-4])
xbetas1n<-covs1n%*%t(sim.betas2a[,-4])
xbetas2n<-covs2n%*%t(sim.betas2a[,-4])
xbetas3n<-covs3n%*%t(sim.betas2a[,-4])
xbetas4n<-covs4n%*%t(sim.betas2a[,-4])

### Transforming re-paramterized sigma2 

sigma2a<-exp(sim.betas2a[,4])
sigmaa<-sqrt(sigma2a)

### Simulting the stochastic component

set.seed(8766)
pvs0n<-sapply(X=sigmaa,FUN=pv.fun,mean=xbetas0n)
pvs1n<-sapply(X=sigmaa,FUN=pv.fun,mean=xbetas1n)
pvs2n<-sapply(X=sigmaa,FUN=pv.fun,mean=xbetas2n)
pvs3n<-sapply(X=sigmaa,FUN=pv.fun,mean=xbetas3n)
pvs4n<-sapply(X=sigmaa,FUN=pv.fun,mean=xbetas4n)

### FIRST DIFFERENCES

### Generating expected values for jurisdictional hierarchy levels of 0 and 4

set.seed(8766)
evs0n<-sapply(X=sigmaa,FUN=evnorm.fun,mean=xbetas0n)
evs2n<-sapply(X=sigmaa,FUN=evnorm.fun,mean=xbetas2n)

### Creating a vector of first differences and taking the mean

fdiff22<-evs2n-evs0n
fdiffmean22<-mean(fdiff22)

### Obtaining 99% confidence intervals

upperci22<-sort(fdiff22)[9900]
lowerci22<-sort(fdiff22)[100]


### Model 3: OLS controlling for jurisdictional hierarchy and ethnic group-level controls

### Optimizing

data2<-na.omit(data1)
controls<-cbind(data2$centr_tribe,data2$lnpd0,data2$lnkm,data2$pixpetro,data2$pixdia,data2$pixwaterd,data2$pixcapdist,data2$pixmal,data2$pixsead,data2$pixsuit,data2$pixelev,data2$pixbdist,data2$capdistance1,data2$seadist1,data2$borderdist1,data2$lnwaterkm,data2$lnkm2split,data2$mean_elev,data2$mean_suit,data2$malariasuit,data2$petroleum,data2$diamondd)

par<-rep(0,24)
opt.norm3<-optim(par=par,fn=ll.norm,x=cbind(1,controls),y=data2$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Simulating 10000 parameter vectors

varcov2b<--solve(opt.norm3$hessian)

set.seed(8766)
sim.betas2b<-rmvnorm(n=10000,mean=opt.norm3$par,sigma=varcov2b)

### Creating a vector of covariates with different levels of centr_tribe and all other variables held at their medians

controls3<-controls[,-1]
ncol(controls3)

meds<-c()
for(i in 1:ncol(controls3)){
  meds[i]<-median(controls3[,i])  
}

covs0n2<-c(1,0,meds)
covs1n2<-c(1,1,meds)
covs2n2<-c(1,2,meds)
covs3n2<-c(1,3,meds)
covs4n2<-c(1,4,meds)

### Generating mean values using the systematic component

xbetas0n2<-covs0n2%*%t(sim.betas2b[,-4])
xbetas1n2<-covs1n2%*%t(sim.betas2b[,-4])
xbetas2n2<-covs2n2%*%t(sim.betas2b[,-4])
xbetas3n2<-covs3n2%*%t(sim.betas2b[,-4])
xbetas4n2<-covs4n2%*%t(sim.betas2b[,-4])

### Transforming re-paramterized sigma2 

sigma2b<-exp(sim.betas2b[,4])
sigmab<-sqrt(sigma2b)

### Simulting the stochastic component

set.seed(8766)
pvs0n2<-sapply(X=sigmab,FUN=pv.fun,mean=xbetas0n2)
pvs1n2<-sapply(X=sigmab,FUN=pv.fun,mean=xbetas1n2)
pvs2n2<-sapply(X=sigmab,FUN=pv.fun,mean=xbetas2n2)
pvs3n2<-sapply(X=sigmab,FUN=pv.fun,mean=xbetas3n2)
pvs4n2<-sapply(X=sigmab,FUN=pv.fun,mean=xbetas4n2)

### FIRST DIFFERENCES

### Generating expected values for jurisdictional hierarchy levels of 0 and 2

set.seed(8766)
evs0n2<-sapply(X=sigmab,FUN=evnorm.fun,mean=xbetas0n2)
evs2n2<-sapply(X=sigmab,FUN=evnorm.fun,mean=xbetas2n2)

### Creating a vector of first differences and taking the means

fdiff32<-evs2n2-evs0n2
fdiffmean32<-mean(fdiff32)

upperci32<-sort(fdiff32)[9900]
lowerci32<-sort(fdiff32)[100]


### Repeating with logit specifications

## Logit maximum likelihood

ll.logit<-function(par,y,covs){
  if(!all(covs[,1] == 1)){
    covs <- as.matrix(cbind(1,covs))
  }
  xb <- covs %*% par
  sum(y * xb - xb - log(1 + exp(-xb)))
}

### Model 4: Bivariate logit regression of binary pixel luminosity on jurisdictional hierarchy

par<-rep(0,2)
opt.logit<-optim(par=par,fn=ll.logit,covs=as.matrix(data1$centr_tribe),y=data1$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Generating 1000 parameter vectors

hist(data1$centr_tribe)

varcov3<--solve(opt.logit$hessian)

library(mvtnorm)
set.seed(8766)
sim.betas3<-rmvnorm(n=10000,mean=opt.logit$par,sigma=varcov3)

### Creating vectors of covariates with different levels of centr_tribe

covs0l<-c(1,0)
covs1l<-c(1,1)
covs2l<-c(1,2)
covs3l<-c(1,3)
covs4l<-c(1,4)

### Generating predicted probabilities using simulated parameters and the link function

pvs.0l<-1/(1+exp(-(covs0l)%*%t(sim.betas3)))
pvs.1l<-1/(1+exp(-(covs1l)%*%t(sim.betas3)))
pvs.2l<-1/(1+exp(-(covs2l)%*%t(sim.betas3)))
pvs.3l<-1/(1+exp(-(covs3l)%*%t(sim.betas3)))
pvs.4l<-1/(1+exp(-(covs4l)%*%t(sim.betas3)))


### FIRST DIFFERENCES

### Writing a function to produce expected values

evlog.fun<-function(x){
  sims<-rbinom(n=1000,size=1,prob=x)
  evs<-mean(sims)
}

### Generating expected values for jurisdictional hierarchy levels of 0 and 2

set.seed(8766)
evs0l<-sapply(X=pvs.0l,FUN=evlog.fun)
evs2l<-sapply(X=pvs.2l,FUN=evlog.fun)

### Creating a vector of first differences and taking the mean

fdiff42<-evs2l-evs0l
fdiffmean42<-mean(fdiff42)

upperci42<-sort(fdiff42)[9900]
lowerci42<-sort(fdiff42)[100]

### Model 5: controlling for population density

par<-rep(0,3)
opt.logit2<-optim(par=par,fn=ll.logit,covs=cbind(data1$centr_tribe,data1$lnpd0),y=data1$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Generating 10000 parameter vectors

varcov4<--solve(opt.logit2$hessian)

sim.betas4<-rmvnorm(n=10000,mean=opt.logit2$par,sigma=varcov4)

### Creating a vector of covariates with different levels of centr_tribe and population density held at its median

covs0l2<-c(1,0,median(data1$lnpd0))
covs1l2<-c(1,1,median(data1$lnpd0))
covs2l2<-c(1,2,median(data1$lnpd0))
covs3l2<-c(1,3,median(data1$lnpd0))
covs4l2<-c(1,4,median(data1$lnpd0))

### Generating predicted probabilities

pvs.0l2<-1/(1+exp(-(covs0l2)%*%t(sim.betas4)))
pvs.1l2<-1/(1+exp(-(covs1l2)%*%t(sim.betas4)))
pvs.2l2<-1/(1+exp(-(covs2l2)%*%t(sim.betas4)))
pvs.3l2<-1/(1+exp(-(covs3l2)%*%t(sim.betas4)))
pvs.4l2<-1/(1+exp(-(covs4l2)%*%t(sim.betas4)))

### FIRST DIFFERENCES

### Generating expected values for jurisdictional hierarchy levels of 0 and 2

set.seed(8766)
evs0l2<-sapply(X=pvs.0l2,FUN=evlog.fun)
evs2l2<-sapply(X=pvs.2l2,FUN=evlog.fun)

### Creating a vector of first differences and taking the mean


fdiff52<-evs1l2-evs0l2
fdiffmean52<-mean(fdiff52)

upperci52<-sort(fdiff52)[9900]
lowerci52<-sort(fdiff52)[100]


### Model 6: controlling for population density and ethnic group-level controls 

par<-rep(0,23)
opt.logit3<-optim(par=par,fn=ll.logit,covs=controls,y=data2$l0708d,control=list(fnscale=-1),method="BFGS",hessian=TRUE)

### Simulating 10000 parameter values

varcov5<--solve(opt.logit3$hessian)

set.seed(8766)
sim.betas5<-rmvnorm(n=10000,mean=opt.logit3$par,sigma=varcov5)

### Creating a vector of covariates with different levels of centr_tribe

covs0l3<-c(1,0,meds)
covs1l3<-c(1,1,meds)
covs2l3<-c(1,2,meds)
covs3l3<-c(1,3,meds)
covs4l3<-c(1,4,meds)

### Generating predicted probabilities

pvs.0l3<-1/(1+exp(-(covs0l3)%*%t(sim.betas5)))
pvs.1l3<-1/(1+exp(-(covs1l3)%*%t(sim.betas5)))
pvs.2l3<-1/(1+exp(-(covs2l3)%*%t(sim.betas5)))
pvs.3l3<-1/(1+exp(-(covs3l3)%*%t(sim.betas5)))
pvs.4l3<-1/(1+exp(-(covs4l3)%*%t(sim.betas5)))


### FIRST DIFFERENCES

### Generating expected values for jurisdictional hierarchy levels of 0 and 2

set.seed(8766)
evs0l3<-sapply(X=pvs.0l3,FUN=evlog.fun)
evs2l3<-sapply(X=pvs.2l3,FUN=evlog.fun)

### Creating a vector of first differences and taking the means (0 vs. 2)

### Taking the first differences between 0 and 2

fdiff62<-evs2l3-evs0l3
fdiffmean62<-mean(fdiff62)

upperci62<-sort(fdiff62)[9900]
lowerci62<-sort(fdiff62)[100]

fdiffmean62
upperci62
lowerci62

### Compiling first differences and CIs int oa table (0 to 2)

firstdiffmeans2<-c(fdiffmean12,fdiffmean22,fdiffmean32,fdiffmean42,fdiffmean52,fdiffmean62)
firstdiff.upperci992<-c(upperci12,upperci22,upperci32,upperci42,upperci52,upperci62)
firstdiff.lowerci992<-c(lowerci2,lowerci22,lowerci32,lowerci42,lowerci52,lowerci62)

firstdiffs02<-cbind(c(1:6),firstdiffmeans2,firstdiff.upperci992,firstdiff.lowerci992)
rownames(firstdiffs02)<-c("Model 1 (OLS)","Model 2 (OLS)","Model 3 (OLS)","Model 4 (logit)","Model 5 (logit)","Model 6 (logit)")
colnames(firstdiffs02)<-c("Mod","F.diff (centr_tribe 2 vs. 0)","Upper","Lower")

firstdiffs02<-as.data.frame(firstdiffs02)
firstdiffs02

### Plotting first differences

index=c(1:6)

plot(x=firstdiffmeans,y=index,xlim=c(-0.07,0.2),main="First Differences (Stateless Ethnicities vs Paramount Chiefdoms)",xlab="Difference in Probability of Lit Pixel",ylab="Model",cex.main=0.8,pch=16)
abline(v=0)
segments(lowerci12,1,upperci12,1,lwd=2,col="blue")
segments(lowerci22,2,upperci22,2,lwd=2,col="blue")
segments(lowerci32,3,upperci32,3,lwd=2,col="blue")
segments(lowerci42,4,upperci42,4,lwd=2,col="red")
segments(lowerci52,5,upperci52,5,lwd=2,col="red")
segments(lowerci62,6,upperci62,6,lwd=2,col="red")

legend("topleft",c("Logit","OLS"),col=c("red","blue"),lwd=c(2,2),bty="n",cex=0.9)



######################
#####Table 3: OVB#####
######################

#Column 1
model1<-lm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-lm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-lm(l0708d~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)

#Column 5
model5<-glm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model5)
mclx(model5, 1, data1$pixwbcode, data1$pixcluster)

#Column 6
model6<-glm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1, family="binomial")
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)

#Column 7
model7<-glm(l0708d~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model7)
mclx(model7, 1, data1$pixwbcode, data1$pixcluster)

#Column 8
model8<-glm(l0708d~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model8)
mclx(model8, 1, data1$pixwbcode, data1$pixcluster)

#########################
#####Table 4: OVB 2######
#########################


#Column 1
model1<-lm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-lm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-lm(l0708d~centr_tribe+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)

#Column 5
model5<-glm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model5)
mclx(model5, 1, data1$pixwbcode, data1$pixcluster)

#Column 6
model6<-glm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1, family="binomial")
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)

#Column 7
model7<-glm(l0708d~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model7)
mclx(model7, 1, data1$pixwbcode, data1$pixcluster)

#Column 8
model8<-glm(l0708d~centr_tribe+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model8)
mclx(model8, 1, data1$pixwbcode, data1$pixcluster)


############################
#######Table 5: OVB 3#######
############################

#Column 1
model1<-lm(l0708d~gr+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(l0708d~gr+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-glm(l0708d~gr+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-glm(l0708d~gr+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)

#Column 5
model5<-lm(l0708d~petty+paramount+state+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model5)
mclx(model5, 1, data1$pixwbcode, data1$pixcluster)

#Column 6
model6<-lm(l0708d~petty+paramount+state+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)

#Column 7
model7<-glm(l0708d~petty+paramount+state+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model7)
mclx(model7, 1, data1$pixwbcode, data1$pixcluster)

#Column 8
model8<-glm(l0708d~petty+paramount+state+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1, family="binomial")
summary(model8)
mclx(model8, 1, data1$pixwbcode, data1$pixcluster)

##########################################
#####TABLE 6: POP DENSITY AS OUTCOME######
##########################################

#Column 1
model1<-lm(lnpd0~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(lnpd0~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-lm(lnpd0~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-lm(lnpd0~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)


#############################################################
#####################CONTINUOUS DV###########################
#############################################################
  
##########################
#######Table 7: OVB#######
##########################



#Column 1
model1<-lm(lnl0708s~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(lnl0708s~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-lm(lnl0708s~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-lm(lnl0708s~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)

#Column 5
model5<-lm(lnl0708s~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixwbcode), data=data1)
summary(model5)
mclx(model5, 1, data1$pixwbcode, data1$pixcluster)

#Column 6
model6<-lm(lnl0708s~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster), data=data1)
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)

#Column 7
model7<-lm(lnl0708s~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model7)
mclx(model7, 1, data1$pixwbcode, data1$pixcluster)

#Column 8
model8<-lm(lnl0708s~centr_tribe+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model8)
mclx(model8, 1, data1$pixwbcode, data1$pixcluster)

###############################################
########Table 8:  post treatment bias##########
###############################################

model0<-lm(lnl0708s~lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
data1$lnl0708s_adj<-data1$lnl0708s-model0$coefficient[2]*data1$lnpd0

#Column 1
model1<-lm(lnl0708s~centr_tribe+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model1)
mclx(model1, 1, data1$pixwbcode, data1$pixcluster)

#Column 2
model2<-lm(lnl0708s_adj~centr_tribe+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model2)
mclx(model2, 1, data1$pixwbcode, data1$pixcluster)

#Column 3
model3<-lm(lnl0708s~centr_tribe+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model3)
mclx(model3, 1, data1$pixwbcode, data1$pixcluster)

#Column 4
model4<-lm(lnl0708s_adj~centr_tribe+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model4)
mclx(model4, 1, data1$pixwbcode, data1$pixcluster)

#Column 5
model5<-lm(lnl0708s~petty+paramount+state+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model5)
mclx(model5, 1, data1$pixwbcode, data1$pixcluster)

#Column 6
model6<-lm(lnl0708s_adj~petty+paramount+state+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model6)
mclx(model6, 1, data1$pixwbcode, data1$pixcluster)

#Column 7
model7<-lm(lnl0708s~petty+paramount+state+setpatt+lnpd0+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model7)
mclx(model7, 1, data1$pixwbcode, data1$pixcluster)

#Column 8
model8<-lm(lnl0708s_adj~petty+paramount+state+setpatt+lnkm+pixpetro+pixdia+pixwaterd+pixcapdist+pixmal+pixsead+pixsuit+pixelev+pixbdist+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(data1$pixcluster)+factor(data1$pixwbcode), data=data1)
summary(model8)
mclx(model8, 1, data1$pixwbcode, data1$pixcluster)


###################################################################
##################ETHNIC-HOMELAND LEVEL ANALYSIS###################
###################################################################

data2<-read.dta("ethnicity-country-data-final.dta")
data2<-data2[-which(data2$wbcode=="SWZ"),]
data2<-data2[-which(data2$cluster=="Shona and Thonga - Shona"),]
data2<-data2[-which(data2$cluster=="Plateu Nigerians - Kwa"),]
data2<-data2[-which(data2$cluster=="Negroes of the Sudan Fringe - Bagirmi Province"),]
data2<-data2[-which(data2$cluster=="Central Bantu - Kimbundu"),]

data3<-data2[which(data2$lnlights0708!="NA"),]

data2$setpatt<-data2$settle_patt
data3$setpatt<-data3$settle_patt

data2$petty<-data2$centr_tribe==1
data2$paramount<-data2$centr_tribe==2
data2$state<-data2$centr_tribe==3 | data2$centr_tribe==4

data3$petty<-data3$centr_tribe==1
data3$paramount<-data3$centr_tribe==2
data3$state<-data3$centr_tribe==3 | data3$centr_tribe==4

##############################################
###########Table 10: OVB 1####################
##############################################

#Column 1
model1<-lm(lnlights_nozeros~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model1)
mclx(model1,dfcw=1,data2$wbcode,data2$cluster)

#Column 2
model2<-lm(lnlights_nozeros~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data2)
summary(model2)
mclx(model2,dfcw=1,data2$wbcode,data2$cluster)

#Column 3
model3<-lm(lnlights0708~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
summary(model3)
mclx(model3,dfcw=1,data3$wbcode,data3$cluster)

#Column 4
model4<-lm(lnlights0708~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data3)
summary(model4)
mclx(model4,dfcw=1,data3$wbcode,data3$cluster)

##################################
#########Table 11: OVB 2: #########
##################################

#Column 1
model1<-lm(lnlights_nozeros~centr_tribe+setpatt+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model1)
mclx(model1,dfcw=1,data2$wbcode,data2$cluster)

#Column 2
model2<-lm(lnlights_nozeros~centr_tribe+setpatt+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data2)
summary(model2)
mclx(model2,dfcw=1,data2$wbcode,data2$cluster)

#Column 3
model3<-lm(lnlights0708~centr_tribe+setpatt+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
summary(model3)
mclx(model3,dfcw=1,data3$wbcode,data3$cluster)

#Column 4
model4<-lm(lnlights0708~centr_tribe+setpatt+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data3)
summary(model4)
mclx(model4,dfcw=1,data3$wbcode,data3$cluster)

#################################################
##########Table 12:  post treatment bias#########
#################################################


model0<-lm(lnlights_nozeros~lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
data2$lnlights_nozeros_adj<-data2$lnlights_nozeros-model0$coefficient[2]*data2$lnpdmean00s

model0<-lm(lnlights0708~lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
data3$lnlights0708_adj<-data3$lnlights0708-model0$coefficient[2]*data3$lnpdmean00s

#Column 1
model1<-lm(lnlights_nozeros~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model1)
mclx(model1,dfcw=1,data2$wbcode,data2$cluster)

#Column 2
model2<-lm(lnlights_nozeros_adj~centr_tribe+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model2)
mclx(model2,dfcw=1,data2$wbcode,data2$cluster)

#Column 3
model3<-lm(lnlights_nozeros_adj~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model3)
mclx(model3,dfcw=1,data2$wbcode,data2$cluster)

#Column 4
model4<-lm(lnlights_nozeros_adj~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data2)
summary(model4)
mclx(model4,dfcw=1,data2$wbcode,data2$cluster)

#Column 5
model5<-lm(lnlights0708_adj~centr_tribe+lnpdmean00s+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
summary(model5)
mclx(model5,dfcw=1,data3$wbcode,data3$cluster)

#Column 6
model6<-lm(lnlights0708_adj~centr_tribe+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
summary(model6)
mclx(model6,dfcw=1,data3$wbcode,data3$cluster)

#Column 7
model7<-lm(lnlights0708_adj~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data3)
summary(model7)
mclx(model7,dfcw=1,data3$wbcode,data3$cluster)

#Column 8
model8<-lm(lnlights0708_adj~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data3)
summary(model8)
mclx(model8,dfcw=1,data3$wbcode,data3$cluster)

#####################################################################
##############Table 13: Population density as outcome variable#######
#####################################################################

#Column 1
model1<-lm(lnpdmean00s~centr_tribe+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model1)
mclx(model1,dfcw=1,data2$wbcode,data2$cluster)

#Column 2
model2<-lm(lnpdmean00s~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(wbcode),data=data2)
summary(model2)
mclx(model2,dfcw=1,data2$wbcode,data2$cluster)

#Column 3
model3<-lm(lnpdmean00s~centr_tribe+setpatt+capdistance1+seadist1+borderdist1+lnwaterkm+lnkm2split+mean_elev+mean_suit+malariasuit+petroleum+diamondd+factor(cluster),data=data2)
summary(model3)
mclx(model3,dfcw=1,data2$wbcode,data2$cluster)

######################################################################################################
############################### WITHIN COUNTRY ANALYSIS ##############################################
######################################################################################################


########################################################## TABLE 9 #########################################################
############################################## DRC ###########################################

### Load individual and household DHS data subsets (for clusters within a 5 km radius of ethnic homeland borders)

load("edrch2.clust5km")
load("edrci2.clust5km")

#### ANALYSIS (HOUSEHOLD) #####

### Piped water

modpipedrc<-lm(piped~centr_tribe,weights=numhh,data=edrch2.clust)
summary(modpipedrc)

### controlling for population density

modpipedrc2<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=edrch2.clust)
summary(modpipedrc2)

### Bootstrapping

set.seed(02139)
sims<-1000
statpipe.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  mod<-lm(piped~centr_tribe,weights=numhh,data=s)
  statpipe.drc[i]<-mod$coeff[2]
}

estpipe.drc<-mean(statpipe.drc)
sepipe.drc<-sd(statpipe.drc)
tpipe.drc<-estpipe.drc/sepipe.drc
tpipe.drc

pvalpipe.drc<-2*(1-pt(abs(tpipe.drc),df=size-1))

respipe.drc<-c(estpipe.drc,sepipe.drc,pvalpipe.drc)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statpipe.drc2<-c()
statpipepop.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  mod<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=s)
  statpipe.drc2[i]<-mod$coeff[2]
  statpipepop.drc[i]<-mod$coeff[3]
}

estpipe.drc2<-mean(statpipe.drc2)
sepipe.drc2<-sd(statpipe.drc2)
tpipe.drc2<-estpipe.drc2/sepipe.drc2

pvalpipe.drc2<-2*(1-pt(abs(tpipe.drc2),df=size-1))

respipe.drc2<-c(estpipe.drc2,sepipe.drc2,pvalpipe.drc2)

estpipepop.drc<-mean(statpipepop.drc)
sepipepop.drc<-sd(statpipepop.drc)
tpipepop.drc<-estpipepop.drc/sepipepop.drc

pvalpipepop.drc<-2*(1-pt(abs(tpipepop.drc),df=size-1))

respipepop.drc<-c(estpipepop.drc,sepipepop.drc,pvalpipepop.drc)

### Electricity

modelecdrc<-lm(elec~centr_tribe,weights=numhh,data=edrch2.clust)
summary(modelecdrc)

modelecdrc2<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=edrch2.clust)
summary(modelecdrc2) 

### Bootstrapping

set.seed(02139)
sims<-1000
statelec.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  mod<-lm(elec~centr_tribe,weights=numhh,data=s)
  statelec.drc[i]<-mod$coeff[2]
}

estelec.drc<-mean(statelec.drc)
seelec.drc<-sd(statelec.drc)
telec.drc<-estelec.drc/seelec.drc

pvalelec.drc<-2*(1-pt(abs(telec.drc),df=size-1))

reselec.drc<-c(estelec.drc,seelec.drc,pvalelec.drc)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statelec.drc2<-c()
statelecpop.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  mod<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=s)
  statelec.drc2[i]<-mod$coeff[2]
  statelecpop.drc[i]<-mod$coeff[3]
}

estelec.drc2<-mean(statelec.drc2)
seelec.drc2<-sd(statelec.drc2)
telec.drc2<-estelec.drc2/seelec.drc2

pvalelec.drc2<-2*(1-pt(abs(telec.drc2),df=size-1))

reselec.drc2<-c(estelec.drc2,seelec.drc2,pvalelec.drc2)
reselec.drc2

estelecpop.drc<-mean(statelecpop.drc)
estelecpop.drc
seelecpop.drc<-sd(statelecpop.drc)
telecpop.drc<-estelecpop.drc/seelecpop.drc

pvalelecpop.drc<-2*(1-pt(abs(telecpop.drc),df=size-1))

reselecpop.drc<-c(estelecpop.drc,seelecpop.drc,pvalelecpop.drc)

### Wealth

modwealthdrc<-lm(wealthcat~centr_tribe,weights=numhh,data=edrch2.clust)
summary(modwealthdrc)

modwealthdrc2<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=edrch2.clust)
summary(modwealthdrc2)

### Bootstrapping 

set.seed(02139)
sims<-1000
statwealthcat.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  clustnum<-unlist(numhh.drc[s$DHSCLUST])
  clustweights<-1/clustnum
  mod<-lm(wealthcat~centr_tribe,weights=numhh,data=s)
  statwealthcat.drc[i]<-mod$coeff[2]
}

estwealthcat.drc<-mean(statwealthcat.drc)
sewealthcat.drc<-sd(statwealthcat.drc)
twealthcat.drc<-estwealthcat.drc/sewealthcat.drc

pvalwealthcat.drc<-2*(1-pt(abs(twealthcat.drc),df=size-1))

reswealthcat.drc<-c(estwealthcat.drc,sewealthcat.drc,pvalwealthcat.drc)

sort(statwealthcat.drc)[100]

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statwealthcat.drc2<-c()
statwealthcatpop.drc<-c()
size<-length(unique(edrch2.clust$TRIBE))
list.id<-tapply(1:nrow(edrch2.clust),edrch2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrch2.clust[new.row,]
  clustnum<-unlist(numhh.drc[s$DHSCLUST])
  clustweights<-1/clustnum
  mod<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=s)
  statwealthcat.drc2[i]<-mod$coeff[2]
  statwealthcatpop.drc[i]<-mod$coeff[3]
}

estwealthcat.drc2<-mean(statwealthcat.drc2)
sewealthcat.drc2<-sd(statwealthcat.drc2)
twealthcat.drc2<-estwealthcat.drc2/sewealthcat.drc2

pvalwealthcat.drc2<-2*(1-pt(abs(twealthcat.drc2),df=size-1))

reswealthcat.drc2<-c(estwealthcat.drc2,sewealthcat.drc2,pvalwealthcat.drc2)

estwealthcatpop.drc<-mean(statwealthcatpop.drc)
sewealthcatpop.drc<-sd(statwealthcatpop.drc)
twealthcatpop.drc<-estwealthcatpop.drc/sewealthcatpop.drc

pvalwealthcatpop.drc<-2*(1-pt(abs(twealthcatpop.drc),df=size-1))

reswealthcatpop.drc<-c(estwealthcatpop.drc,sewealthcatpop.drc,pvalwealthcatpop.drc)


### ANALYSIS (INDIVIDUAL) ###

### Education

modedudrc<-lm(edu~centr_tribe,weights=numi,data=edrci2.clust)
summary(modedudrc)

modedudrc2<-lm(edu~centr_tribe+lnpd00,weights=numi,data=edrci2.clust)
summary(modedudrc2)

### Bootstrapping

set.seed(02139)
sims<-1000
statedu.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(edu~centr_tribe,weights=numi,data=s)
  statedu.drc[i]<-mod$coeff[2]
}

estedu.drc<-mean(statedu.drc)
seedu.drc<-sd(statedu.drc)
tedu.drc<-estedu.drc/seedu.drc

pvaledu.drc<-2*(1-pt(abs(tedu.drc),df=size-1))

resedu.drc<-c(estedu.drc,seedu.drc,pvaledu.drc)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statedu.drc2<-c()
statedupop.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(edu~centr_tribe+lnpd00,weights=numi,data=s)
  statedu.drc2[i]<-mod$coeff[2]
  statedupop.drc[i]<-mod$coeff[3]
}

estedu.drc2<-mean(statedu.drc2)
seedu.drc2<-sd(statedu.drc2)
tedu.drc2<-estedu.drc2/seedu.drc2

pvaledu.drc2<-2*(1-pt(abs(tedu.drc2),df=size-1))

resedu.drc2<-c(estedu.drc2,seedu.drc2,pvaledu.drc2)


estedupop.drc<-mean(statedupop.drc)
seedupop.drc<-sd(statedupop.drc)
tedupop.drc<-estedupop.drc/seedupop.drc

pvaledupop.drc<-2*(1-pt(abs(tedupop.drc),df=size-1))
resedupop.drc<-c(estedupop.drc,seedupop.drc,pvaledupop.drc)


### Literacy

modlitdrc<-lm(lit~centr_tribe,weights=numi,data=edrci2.clust)
summary(modlitdrc) 

modlitdrc2<-lm(lit~centr_tribe+lnpd00,weights=numi,data=edrci2.clust)
summary(modlitdrc2)

### Bootstrapping

set.seed(02139)
sims<-1000
statlit.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(lit~centr_tribe,weights=numi,data=s)
  statlit.drc[i]<-mod$coeff[2]
}

estlit.drc<-mean(statlit.drc)
selit.drc<-sd(statlit.drc)
tlit.drc<-estlit.drc/selit.drc


pvallit.drc<-2*(1-pt(abs(tlit.drc),df=size-1))

reslit.drc<-c(estlit.drc,selit.drc,pvallit.drc)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statlit.drc2<-c()
statlitpop.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(lit~centr_tribe+lnpd00,weights=numi,data=s)
  statlit.drc2[i]<-mod$coeff[2]
  statlitpop.drc[i]<-mod$coeff[3]
}

estlit.drc2<-mean(statlit.drc2)
selit.drc2<-sd(statlit.drc2)
tlit.drc2<-estlit.drc2/selit.drc2

pvallit.drc2<-2*(1-pt(abs(tlit.drc2),df=size-1))

reslit.drc2<-c(estlit.drc2,selit.drc2,pvallit.drc2)

estlitpop.drc<-mean(statlitpop.drc)
selitpop.drc<-sd(statlitpop.drc)
tlitpop.drc<-estlitpop.drc/selitpop.drc

pvallitpop.drc<-2*(1-pt(abs(tlitpop.drc),df=size-1))

reslitpop.drc<-c(estlitpop.drc,selitpop.drc,pvallitpop.drc)

### Child mortality

modmortdrc<-lm(mort~centr_tribe,weights=numi,data=edrci2.clust)
summary(modmortdrc)

modmortdrc2<-lm(mort~centr_tribe+lnpd00,weights=numi,data=edrci2.clust)
summary(modmortdrc2)

### Bootstrapping

set.seed(02139)
sims<-1000
statmort.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(mort~centr_tribe,weights=numi,data=s)
  statmort.drc[i]<-mod$coeff[2]
}

estmort.drc<-mean(statmort.drc)
semort.drc<-sd(statmort.drc)
tmort.drc<-estmort.drc/semort.drc


pvalmort.drc<-2*(1-pt(abs(tmort.drc),df=size-1))

resmort.drc<-c(estmort.drc,semort.drc,pvalmort.drc)
resmort.drc

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statmort.drc2<-c()
statmortpop.drc<-c()
size<-length(unique(edrci2.clust$TRIBE))
list.id<-tapply(1:nrow(edrci2.clust),edrci2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-edrci2.clust[new.row,]
  mod<-lm(mort~centr_tribe+lnpd00,weights=numi,data=s)
  statmort.drc2[i]<-mod$coeff[2]
  statmortpop.drc[i]<-mod$coeff[3]
}

estmort.drc2<-mean(statmort.drc2)
semort.drc2<-sd(statmort.drc2)
tmort.drc2<-estmort.drc2/semort.drc2


pvalmort.drc2<-2*(1-pt(abs(tmort.drc2),df=size-1))

resmort.drc2<-c(estmort.drc2,semort.drc2,pvalmort.drc2)


estmortpop.drc<-mean(statmortpop.drc)
semortpop.drc<-sd(statmortpop.drc)
tmortpop.drc<-estmortpop.drc/semortpop.drc


pvalmortpop.drc<-2*(1-pt(abs(tmortpop.drc),df=size-1))

resmortpop.drc<-c(estmortpop.drc,semortpop.drc,pvalmortpop.drc)

### Making the table

stargazer(modwealthdrc,modwealthdrc2,modpipedrc,modpipedrc2,modelecdrc,modelecdrc2,modedudrc,modedudrc2,modlitdrc,modlitdrc2,modmortdrc,modmortdrc2)

## bootstrap standard errors
reswealthcat.drc[2]
reswealthcat.drc2[2]
reswealthcatpop.drc[2]

respipe.drc[2]
respipe.drc2[2]
respipepop.drc[2]

reselec.drc2[2]
reselec.drc2[2]
reselecpop.drc[2]

resedu.drc[2]
resedu.drc2[2]
resedupop.drc[2]

reslit.drc[2]
reslit.drc2[2]
reslitpop.drc[2]

resmort.drc [2]
resmort.drc2 [2]
resmortpop.drc [2]

########################################################## TABLE 10 #########################################################
############################################## NIGERIA ###########################################

load("engah2.clust5km")
load("engai2.clust5km")

#### ANALYSIS (HOUSEHOLD) #####

### Piped water

modpipenga<-lm(piped~centr_tribe,weights=numhh,data=engah2.clust)
summary(modpipenga) ## not significant
plot(modpipenga)

modpipenga2<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=engah2.clust)
summary(modpipenga2)
plot(modpipenga2)

### Bootstrapping

set.seed(02139)
sims<-1000
statpipe.nga<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engah2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engah2.clust[new.row,]
  mod<-lm(piped~centr_tribe,weights=numhh,data=s)
  statpipe.nga[i]<-mod$coeff[2]
}

estpipe.nga<-mean(na.omit(statpipe.nga))
sepipe.nga<-sd(na.omit(statpipe.nga))
tpipe.nga<-estpipe.nga/sepipe.nga

pvalpipe.nga<-2*(1-pt(abs(tpipe.nga),df=size-1))

respipe.nga<-c(estpipe.nga,sepipe.nga,pvalpipe.nga)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statpipe.nga2<-c()
statpipepop.nga<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engah2.clust[new.row,]
  mod<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=s)
  statpipe.nga2[i]<-mod$coeff[2]
  statpipepop.nga[i]<-mod$coeff[3]
}

estpipe.nga2<-mean(statpipe.nga2)
sepipe.nga2<-sd(statpipe.nga2)
tpipe.nga2<-estpipe.nga2/sepipe.nga2

pvalpipe.nga2<-2*(1-pt(abs(tpipe.nga2),df=size-1))

respipe.nga2<-c(estpipe.nga2,sepipe.nga2,pvalpipe.nga2)


estpipepop.nga<-mean(statpipepop.nga)
sepipepop.nga<-sd(statpipepop.nga)
tpipepop.nga<-estpipepop.nga/sepipepop.nga

pvalpipepop.nga<-2*(1-pt(abs(tpipepop.nga),df=size-1))

respipepop.nga<-c(estpipepop.nga,sepipepop.nga,pvalpipepop.nga)


### Electricity

modelecnga<-lm(elec~centr_tribe,weights=numhh,data=engah2.clust)
summary(modelecnga) 

modelecnga2<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=engah2.clust)
summary(modelecnga2) 

### Bootstrapping 

set.seed(02139)
sims<-1000
statelec.nga<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engah2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engah2.clust[new.row,]
  mod<-lm(elec~centr_tribe,weights=numhh,data=s)
  statelec.nga[i]<-mod$coeff[2]
}

estelec.nga<-mean(statelec.nga)
seelec.nga<-sd(statelec.nga)
telec.nga<-estelec.nga/seelec.nga

pvalelec.nga<-2*(1-pt(abs(telec.nga),df=size-1))

reselec.nga<-c(estelec.nga,seelec.nga,pvalelec.nga)


### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statelec.nga2<-c()
statelecpop.nga2<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engah2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engah2.clust[new.row,]
  mod<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=s)
  statelec.nga2[i]<-mod$coeff[2]
  statelecpop.nga2[i]<-mod$coeff[3]
}

estelec.nga2<-mean(statelec.nga2)
seelec.nga2<-sd(statelec.nga2)
telec.nga2<-estelec.nga2/seelec.nga2

pvalelec.nga2<-2*(1-pt(abs(telec.nga2),df=size-1))

reselec.nga2<-c(estelec.nga2,seelec.nga2,pvalelec.nga2)


estelecpop.nga2<-mean(statelecpop.nga2)
seelecpop.nga2<-sd(statelecpop.nga2)
telecpop.nga2<-estelecpop.nga2/seelec.nga2

pvalelecpop.nga2<-2*(1-pt(abs(telecpop.nga2),df=size-1))

reselecpop.nga2<-c(estelecpop.nga2,seelecpop.nga2,pvalelecpop.nga2)

### Wealt hcat

modwealthcatnga<-lm(wealthcat~centr_tribe,data=engah2.clust)
summary(modwealthcatnga)

modwealthcatnga2<-lm(wealthcat~centr_tribe+lnpd00,data=engah2.clust)
summary(modwealthcatnga2)

### Bootstrapping

set.seed(02139)
sims<-1000
statwealthcat.nga<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engah2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])  
  s<-engah2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe,weights=numhh,data=s)
  statwealthcat.nga[i]<-mod$coeff[2]
}

estwealthcat.nga<-mean(statwealthcat.nga)
sewealthcat.nga<-sd(statwealthcat.nga)
twealthcat.nga<-estwealthcat.nga/sewealthcat.nga

pvalwealthcat.nga<-2*(1-pt(abs(twealthcat.nga),df=size-1))

reswealthcat.nga<-c(estwealthcat.nga,sewealthcat.nga,pvalwealthcat.nga)

#### Bootstrapping controlling for population denisty

set.seed(02139)
sims<-1000
statwealthcat.nga2<-c()
statwealthcatpop.nga2<-c()
size<-length(unique(engah2.clust$TRIBE))
list.id<-tapply(1:nrow(engah2.clust),engah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engah2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=s)
  statwealthcat.nga2[i]<-mod$coeff[2]
  statwealthcatpop.nga2[i]<-mod$coeff[3]
}

estwealthcat.nga2<-mean(statwealthcat.nga2)
sewealthcat.nga2<-sd(statwealthcat.nga2)
twealthcat.nga2<-estwealthcat.nga2/sewealthcat.nga2

pvalwealthcat.nga2<-2*(1-pt(abs(twealthcat.nga2),df=size-1))

reswealthcat.nga2<-c(estwealthcat.nga2,sewealthcat.nga2,pvalwealthcat.nga2)


estwealthcatpop.nga2<-mean(statwealthcatpop.nga2)
sewealthcatpop.nga2<-sd(statwealthcatpop.nga2)
twealthcatpop.nga2<-estwealthcatpop.nga2/sewealthcat.nga2

pvalwealthcatpop.nga2<-2*(1-pt(abs(twealthcatpop.nga2),df=size-1))

reswealthcatpop.nga2<-c(estwealthcatpop.nga2,sewealthcatpop.nga2,pvalwealthcatpop.nga2)


### ANALYSIS (INDIVIDUAL) ###

### Education

modedunga<-lm(edu~centr_tribe,weights=numi,data=engai2.clust)
summary(modedunga)

modedunga2<-lm(edu~centr_tribe+lnpd00,weights=numi,data=engai2.clust)
summary(modedunga2)

### Bootstrapping

set.seed(02139)
sims<-1000
statedu.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(edu~centr_tribe,weights=numi,data=s)
  statedu.nga[i]<-mod$coeff[2]
}

estedu.nga<-mean(statedu.nga)
seedu.nga<-sd(statedu.nga)
tedu.nga<-estedu.nga/seedu.nga

pvaledu.nga<-2*(1-pt(abs(tedu.nga),df=size-1))

resedu.nga<-c(estedu.nga,seedu.nga,pvaledu.nga)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statedu.nga<-c()
statedupop.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(edu~centr_tribe+lnpd00,weights=numi,data=s)
  statedu.nga2[i]<-mod$coeff[2]
  statedupop.nga[i]<-mod$coeff[3]
}

estedu.nga2<-mean(statedu.nga2)
seedu.nga2<-sd(statedu.nga2)
tedu.nga2<-estedu.nga2/seedu.nga2

pvaledu.nga2<-2*(1-pt(abs(tedu.nga2),df=size-1))

resedu.nga2<-c(estedu.nga2,seedu.nga2,pvaledu.nga2)

estedupop.nga<-mean(statedupop.nga)
seedupop.nga<-sd(statedupop.nga)
tedupop.nga<-estedupop.nga/seedupop.nga

pvaledupop.nga<-2*(1-pt(abs(tedupop.nga),df=size-1))

resedupop.nga<-c(estedupop.nga,seedupop.nga,pvaledupop.nga)


### Literacy

modlitnga<-lm(lit~centr_tribe,weights=numi,data=engai2.clust)
summary(modlitnga) 

modlitnga2<-lm(lit~centr_tribe+lnpd00,weights=numi,data=engai2.clust)
summary(modlitnga2)

### Bootstrapping

set.seed(02139)
sims<-1000
statlit.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(lit~centr_tribe,weights=numi,data=s)
  statlit.nga[i]<-mod$coeff[2]
}

estlit.nga<-mean(na.omit(statlit.nga))
selit.nga<-sd(na.omit(statlit.nga))
tlit.nga<-estlit.nga/selit.nga

pvallit.nga<-2*(1-pt(abs(tlit.nga),df=size-1))

reslit.nga<-c(estlit.nga,selit.nga,pvallit.nga)

### Bootstrapping controlling for population density

set.seed(02139)
sims<-1000
statlit.nga2<-c()
statlitpop.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(lit~centr_tribe+lnpd00,weights=numi,data=s)
  statlit.nga2[i]<-mod$coeff[2]
  statlitpop.nga[i]<-mod$coeff[3]
}

estlit.nga2<-mean(statlit.nga2)
selit.nga2<-sd(statlit.nga2)
tlit.nga2<-estlit.nga2/selit.nga2

pvallit.nga2<-2*(1-pt(abs(tlit.nga2),df=size-1))

reslit.nga2<-c(estlit.nga2,selit.nga2,pvallit.nga2)

estlitpop.nga<-mean(statlitpop.nga)
selitpop.nga<-sd(statlitpop.nga)
tlitpop.nga<-estlitpop.nga/selitpop.nga

pvallitpop.nga<-2*(1-pt(abs(tlitpop.nga),df=size-1))

reslitpop.nga<-c(estlitpop.nga,selitpop.nga,pvallitpop.nga)


### Child mortality

modmortnga<-lm(mort~centr_tribe,weights=numi,data=engai2.clust)
summary(modmortnga) 

modmortnga2<-lm(mort~centr_tribe+lnpd00,weights=numi,data=engai2.clust)
summary(modmortnga2) 

### Bootstrapping

set.seed(02139)
sims<-1000
statmort.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(mort~centr_tribe,weights=numi,data=s)
  statmort.nga[i]<-mod$coeff[2]
}

estmort.nga<-mean(na.omit(statmort.nga))
semort.nga<-sd(na.omit(statmort.nga))
tmort.nga<-estmort.nga/semort.nga

pvalmort.nga<-2*(1-pt(abs(tmort.nga),df=size-1))

resmort.nga<-c(estmort.nga,semort.nga,pvalmort.nga)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statmort.nga2<-c()
statmortpop.nga<-c()
size<-length(unique(engai2.clust$TRIBE))
list.id<-tapply(1:nrow(engai2.clust),engai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-engai2.clust[new.row,]
  mod<-lm(mort~centr_tribe+lnpd00,weights=numi,data=s)
  statmort.nga2[i]<-mod$coeff[2]
  statmortpop.nga[i]<-mod$coeff[3]
}

estmort.nga2<-mean(na.omit(statmort.nga2))
semort.nga2<-sd(na.omit(statmort.nga2))
tmort.nga2<-estmort.nga2/semort.nga2

pvalmort.nga2<-2*(1-pt(abs(tmort.nga2),df=size-1))

resmort.nga2<-c(estmort.nga2,semort.nga2,pvalmort.nga2)

estmortpop.nga<-mean(na.omit(statmortpop.nga))
semortpop.nga<-sd(na.omit(statmortpop.nga))
tmortpop.nga<-estmortpop.nga/semortpop.nga

pvalmortpop.nga<-2*(1-pt(abs(tmortpop.nga),df=size-1))

resmortpop.nga<-c(estmortpop.nga,semortpop.nga,pvalmortpop.nga)

### Making the table

stargazer(modwealthcatnga,modwealthcatnga2,modpipenga,modpipenga2,modelecnga,modelecnga2,modedunga,modedunga2,modlitnga,modlitnga2,modmortnga,modmortnga2)

reswealthcat.nga[2]
reswealthcat.nga2[2]
reswealthcatpop.nga2[2]

respipe.nga[2]
respipe.nga2[2]
respipepop.nga[2]

reselec.nga[2]
reselec.nga2[2]
reselecpop.nga2[2]

resedu.nga[2]
resedu.nga2[2]
resedupop.nga[2]

reslit.nga[2]
reslit.nga2[2]
reslitpop.nga[2]

resmort.nga[2]
resmort.nga2[2]
resmortpop.nga[2]


########################################################## TABLE 11 #########################################################
############################################## TANZANIA ###########################################

### Load individual and household DHS data subsets (for clusters within a 5 km radius of ethnic homeland borders)

load("etzah2.clust5km")
load("etzai2.clust5km")

#### ANALYSIS (HOUSEHOLD) #####

### Piped water

modpipetza<-lm(piped~centr_tribe,weights=numhh,data=etzah2.clust)
summary(modpipetza) 

modpipetza2<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=etzah2.clust)
summary(modpipetza2)

### Bootstrapping

set.seed(02139)
sims<-1000
statpipe.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzah2.clust[new.row,]
  mod<-lm(piped~centr_tribe,weights=numhh,data=s)
  statpipe.tza[i]<-mod$coeff[2]
}

estpipe.tza<-mean(na.omit(statpipe.tza))
sepipe.tza<-sd(na.omit(statpipe.tza))
tpipe.tza<-estpipe.tza/sepipe.tza

pvalpipe.tza<-2*(1-pt(abs(tpipe.tza),df=size-1))

respipe.tza<-c(estpipe.tza,sepipe.tza,pvalpipe.tza)

### Controlling for population density

set.seed(02139)
sims<-1000
statpipe.tza2<-c()
statpipepop.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzah2.clust[new.row,]
  mod<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=s)
  statpipe.tza2[i]<-mod$coeff[2]
  statpipepop.tza[i]<-mod$coeff[3]
}

estpipe.tza2<-mean(statpipe.tza2)
sepipe.tza2<-sd(statpipe.tza2)
tpipe.tza2<-estpipe.tza2/sepipe.tza2

pvalpipe.tza2<-2*(1-pt(abs(tpipe.tza2),df=size-1))

respipe.tza2<-c(estpipe.tza2,sepipe.tza2,pvalpipe.tza2)

estpipepop.tza<-mean(statpipepop.tza)
sepipepop.tza<-sd(statpipepop.tza)
tpipepop.tza<-estpipepop.tza/sepipepop.tza

pvalpipepop.tza<-2*(1-pt(abs(tpipepop.tza),df=size-1))

respipepop.tza<-c(estpipepop.tza,sepipepop.tza,pvalpipepop.tza)
respipepop.tza


### Electricity

modelectza<-lm(elec~centr_tribe,weights=numhh,data=etzah2.clust)
summary(modelectza) 

modelectza2<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=etzah2.clust)
summary(modelectza2)

### Bootstrapping 

set.seed(02139)
sims<-1000
statelec.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzah2.clust[new.row,]
  mod<-lm(elec~centr_tribe,weights=numhh,data=s)
  statelec.tza[i]<-mod$coeff[2]
}

estelec.tza<-mean(statelec.tza)
seelec.tza<-sd(statelec.tza)
telec.tza<-estelec.tza/seelec.tza

pvalelec.tza<-2*(1-pt(abs(telec.tza),df=size-1))

reselec.tza<-c(estelec.tza,seelec.tza,pvalelec.tza)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statelec.tza2<-c()
statelecpop.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzah2.clust[new.row,]
  mod<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=s)
  statelec.tza2[i]<-mod$coeff[2]
  statelecpop.tza[i]<-mod$coeff[3]
}

estelec.tza2<-mean(statelec.tza2)
seelec.tza2<-sd(statelec.tza2)
telec.tza2<-estelec.tza2/seelec.tza2

pvalelec.tza2<-2*(1-pt(abs(telec.tza2),df=size-1))

reselec.tza2<-c(estelec.tza2,seelec.tza2,pvalelec.tza2)


estelecpop.tza<-mean(statelecpop.tza)
seelecpop.tza<-sd(statelecpop.tza)
telecpop.tza<-estelecpop.tza/seelecpop.tza

pvalelecpop.tza<-2*(1-pt(abs(telecpop.tza),df=size-1))

reselecpop.tza<-c(estelecpop.tza,seelecpop.tza,pvalelecpop.tza)


### Wealth cat

modwealthcattza<-lm(wealthcat~centr_tribe,weights=numhh,data=etzah2.clust)
summary(modwealthcattza)

modwealthcattza2<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=etzah2.clust)
summary(modwealthcattza2)

set.seed(02139)
sims<-1000
statwealthcat.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])  
  s<-etzah2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe,weights=numhh,data=s)
  statwealthcat.tza[i]<-mod$coeff[2]
}

estwealthcat.tza<-mean(statwealthcat.tza)
sewealthcat.tza<-sd(statwealthcat.tza)
twealthcat.tza<-estwealthcat.tza/sewealthcat.tza

pvalwealthcat.tza<-2*(1-pt(abs(twealthcat.tza),df=size-1))

reswealthcat.tza<-c(estwealthcat.tza,sewealthcat.tza,pvalwealthcat.tza)

### controlling for population density

set.seed(02139)
sims<-1000
statwealthcat.tza2<-c()
statwealthcatpop.tza<-c()
size<-length(unique(etzah2.clust$TRIBE))
list.id<-tapply(1:nrow(etzah2.clust),etzah2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzah2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=s)
  statwealthcat.tza2[i]<-mod$coeff[2]
  statwealthcatpop.tza[i]<-mod$coeff[3]
}

estwealthcat.tza2<-mean(statwealthcat.tza2)
sewealthcat.tza2<-sd(statwealthcat.tza2)
twealthcat.tza2<-estwealthcat.tza2/sewealthcat.tza2

pvalwealthcat.tza2<-2*(1-pt(abs(twealthcat.tza2),df=size-1))

reswealthcat.tza2<-c(estwealthcat.tza2,sewealthcat.tza2,pvalwealthcat.tza2)
reswealthcat.tza2


estwealthcatpop.tza<-mean(statwealthcatpop.tza)
sewealthcatpop.tza<-sd(statwealthcatpop.tza)
twealthcatpop.tza<-estwealthcatpop.tza/sewealthcatpop.tza

pvalwealthcatpop.tza<-2*(1-pt(abs(twealthcatpop.tza),df=size-1))

reswealthcatpop.tza<-c(estwealthcatpop.tza,sewealthcatpop.tza,pvalwealthcatpop.tza)
reswealthcatpop.tza

### ANALYSIS (INDIVIDUAL) ###

### Education

modedutza<-lm(edu~centr_tribe,weights=numi,data=etzai2.clust)
summary(modedutza) ## sig positive effect

modedutza2<-lm(edu~centr_tribe+lnpd00,weights=numi,data=etzai2.clust)
summary(modedutza2)

### Bootstrapping

set.seed(02139)
sims<-1000
statedu.tza<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(edu~centr_tribe,weights=numi,data=s)
  statedu.tza[i]<-mod$coeff[2]
}

estedu.tza<-mean(statedu.tza)
seedu.tza<-sd(statedu.tza)
tedu.tza<-estedu.tza/seedu.tza

pvaledu.tza<-2*(1-pt(abs(tedu.tza),df=size-1))

resedu.tza<-c(estedu.tza,seedu.tza,pvaledu.tza)

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statedu.tza2<-c()
statedupop.tza2<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(edu~centr_tribe+lnpd00,weights=numi,data=s)
  statedu.tza2[i]<-mod$coeff[2]
  statedupop.tza2[i]<-mod$coeff[3]
}

estedu.tza2<-mean(statedu.tza2)
seedu.tza2<-sd(statedu.tza2)
tedu.tza2<-estedu.tza2/seedu.tza2

pvaledu.tza2<-2*(1-pt(abs(tedu.tza2),df=size-1))

resedu.tza2<-c(estedu.tza2,seedu.tza2,pvaledu.tza2)

estedupop.tza2<-mean(statedupop.tza2)
seedupop.tza2<-sd(statedupop.tza2)
tedupop.tza2<-estedupop.tza2/seedupop.tza2

pvaledupop.tza2<-2*(1-pt(abs(tedupop.tza2),df=size-1))

resedupop.tza2<-c(estedupop.tza2,seedupop.tza2,pvaledupop.tza2)

### Literacy

modlittza<-lm(lit~centr_tribe,weights=numi,data=etzai2.clust)
summary(modlittza)

## controlling for population density

modlittza2<-lm(lit~centr_tribe+lnpd00,weights=numi,data=etzai2.clust)
summary(modlittza2)

### Bootstrapping

set.seed(02139)
sims<-1000
statlit.tza<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(lit~centr_tribe,weights=numi,data=s)
  statlit.tza[i]<-mod$coeff[2]
}

estlit.tza<-mean(statlit.tza)
selit.tza<-sd(statlit.tza)
tlit.tza<-estlit.tza/selit.tza

pvallit.tza<-2*(1-pt(abs(tlit.tza),df=size-1))

reslit.tza<-c(estlit.tza,selit.tza,pvallit.tza)
reslit.tza 

### Bootstrapping (controlling for population density)

set.seed(02139)
sims<-1000
statlit.tza2<-c()
statlitpop.tza2<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(lit~centr_tribe+lnpd00,weights=numi,data=s)
  statlit.tza2[i]<-mod$coeff[2]
  statlitpop.tza2[i]<-mod$coeff[3]
}

estlit.tza2<-mean(statlit.tza2)
selit.tza2<-sd(statlit.tza2)
tlit.tza2<-estlit.tza2/selit.tza2

pvallit.tza2<-2*(1-pt(abs(tlit.tza2),df=size-1))

reslit.tza2<-c(estlit.tza2,selit.tza2,pvallit.tza2)


estlitpop.tza2<-mean(na.omit(statlitpop.tza2))
selitpop.tza2<-sd(statlitpop.tza2)
tlitpop.tza2<-estlitpop.tza2/selitpop.tza2

pvallitpop.tza2<-2*(1-pt(abs(tlitpop.tza2),df=size-1))

reslitpop.tza2<-c(estlitpop.tza2,selitpop.tza2,pvallitpop.tza2)

### Child mortality

modmorttza<-lm(mort~centr_tribe,weights=numi,data=etzai2.clust)
summary(modmorttza)

modmorttza2<-lm(mort~centr_tribe+lnpd00,weights=numi,data=etzai2.clust)
summary(modmorttza2) 

### Bootstrapping

set.seed(02139)
sims<-1000
statmort.tza<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(mort~centr_tribe,weights=numi,data=s)
  statmort.tza[i]<-mod$coeff[2]
}

estmort.tza<-mean(na.omit(statmort.tza))
semort.tza<-sd(na.omit(statmort.tza))
tmort.tza<-estmort.tza/semort.tza

pvalmort.tza<-2*(1-pt(abs(tmort.tza),df=size-1))

resmort.tza<-c(estmort.tza,semort.tza,pvalmort.tza)
resmort.tza 

### Bootstrap controlling for population density

set.seed(02139)
sims<-1000
statmort.tza2<-c()
statmortpop.tza2<-c()
size<-length(unique(etzai2.clust$TRIBE))
list.id<-tapply(1:nrow(etzai2.clust),etzai2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-etzai2.clust[new.row,]
  mod<-lm(mort~centr_tribe+lnpd00,weights=numi,data=s)
  statmort.tza2[i]<-mod$coeff[2]
  statmortpop.tza2[i]<-mod$coeff[3]
}

estmort.tza2<-mean(statmort.tza2)
semort.tza2<-sd(statmort.tza2)
tmort.tza2<-estmort.tza2/semort.tza2

pvalmort.tza2<-2*(1-pt(abs(tmort.tza2),df=size-1))

resmort.tza2<-c(estmort.tza2,semort.tza2,pvalmort.tza2)
resmort.tza2 ## still significant


estmortpop.tza2<-mean(na.omit(statmortpop.tza2))
semortpop.tza2<-sd(statmortpop.tza2)
tmortpop.tza2<-estmortpop.tza2/semortpop.tza2

pvalmortpop.tza2<-2*(1-pt(abs(tmortpop.tza2),df=size-1))

resmortpop.tza2<-c(estmortpop.tza2,semortpop.tza2,pvalmortpop.tza2)

### Making the table

stargazer(modwealthcattza,modwealthcattza2,modpipetza,modpipetza2,modelectza,modelectza2,modedutza,modedutza2,modlittza,modlittza2,modmorttza,modmorttza2,keep=c(1,2))

### Bootstrap standard errors

reswealthcat.tza[2]
reswealthcat.tza2[2]
reswealthcatpop.tza[2]

respipe.tza[2]
respipe.tza2[2]
respipepop.tza[2]

reselec.tza[2]
reselec.tza2[2]
reselecpop.tza[2]

resedu.tza[2]
resedu.tza2[2]
resedupop.tza2[2]

reslit.tza[2]
reslit.tza2[2]
reslitpop.tza2[2]

resmort.tza[2]
resmort.tza2[2]
resmortpop.tza2[2]


########################################################## TABLE 12 #########################################################
############################################## ZAMBIA ###########################################

load("ezmbh2.clust5km")
load("ezmbi2.clust5km")

#### ANALYSIS (HOUSEHOLD) #####

### Piped water

modpipezmb<-lm(piped~centr_tribe,weights=numhh,data=ezmbh2.clust)
summary(modpipezmb)

modpipezmb2<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=ezmbh2.clust)
summary(modpipezmb2)

### Bootstrapping

set.seed(02139)
sims<-1000
statpipe.zmb<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbh2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(piped~centr_tribe,weights=numhh,data=s)
  statpipe.zmb[i]<-mod$coeff[2]
}

estpipe.zmb<-mean(na.omit(statpipe.zmb))
sepipe.zmb<-sd(na.omit(statpipe.zmb))
tpipe.zmb<-estpipe.zmb/sepipe.zmb

pvalpipe.zmb<-2*(1-pt(abs(tpipe.zmb),df=size-1))

respipe.zmb<-c(estpipe.zmb,sepipe.zmb,pvalpipe.zmb)

### controlling for population density

set.seed(02139)
sims<-1000
statpipe.zmb2<-c()
statpipepop.zmb2<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(piped~centr_tribe+lnpd00,weights=numhh,data=s)
  statpipe.zmb2[i]<-mod$coeff[2]
  statpipepop.zmb2[i]<-mod$coeff[3]
}

estpipe.zmb2<-mean(statpipe.zmb2)
sepipe.zmb2<-sd(statpipe.zmb2)
tpipe.zmb2<-estpipe.zmb2/sepipe.zmb2

pvalpipe.zmb2<-2*(1-pt(abs(tpipe.zmb2),df=size-1))

respipe.zmb2<-c(estpipe.zmb2,sepipe.zmb2,pvalpipe.zmb2)


estpipepop.zmb2<-mean(na.omit(statpipepop.zmb2))
sepipepop.zmb2<-sd(statpipepop.zmb2)
tpipepop.zmb2<-estpipepop.zmb2/sepipepop.zmb2

pvalpipepop.zmb2<-2*(1-pt(abs(tpipepop.zmb2),df=size-1))

respipepop.zmb2<-c(estpipepop.zmb2,sepipepop.zmb2,pvalpipepop.zmb2)


### Electricity

modeleczmb<-lm(elec~centr_tribe,weights=numhh,data=ezmbh2.clust)
summary(modeleczmb) 

modeleczmb2<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=ezmbh2.clust)
summary(modeleczmb2)

### Bootstrapping 

set.seed(02139)
sims<-1000
statelec.zmb<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbh2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(elec~centr_tribe,weights=numhh,data=s)
  statelec.zmb[i]<-mod$coeff[2]
}

estelec.zmb<-mean(statelec.zmb)
seelec.zmb<-sd(statelec.zmb)
telec.zmb<-estelec.zmb/seelec.zmb

pvalelec.zmb<-2*(1-pt(abs(telec.zmb),df=size-1))

reselec.zmb<-c(estelec.zmb,seelec.zmb,pvalelec.zmb)

### controlling for population density

set.seed(02139)
sims<-1000
statelec.zmb2<-c()
statelecpop.zmb2<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(elec~centr_tribe+lnpd00,weights=numhh,data=s)
  statelec.zmb2[i]<-mod$coeff[2]
  statelecpop.zmb2[i]<-mod$coeff[3]
}

estelec.zmb2<-mean(statelec.zmb2)
seelec.zmb2<-sd(statelec.zmb2)
telec.zmb2<-estelec.zmb2/seelec.zmb2

pvalelec.zmb2<-2*(1-pt(abs(telec.zmb2),df=size-1))

reselec.zmb2<-c(estelec.zmb2,seelec.zmb2,pvalelec.zmb2)


estelecpop.zmb2<-mean(na.omit(statelecpop.zmb2))
seelecpop.zmb2<-sd(statelecpop.zmb2)
telecpop.zmb2<-estelecpop.zmb2/seelecpop.zmb2

pvalelecpop.zmb2<-2*(1-pt(abs(telecpop.zmb2),df=size-1))

reselecpop.zmb2<-c(estelecpop.zmb2,seelecpop.zmb2,pvalelecpop.zmb2)

### Wealth

modwealthcatzmb<-lm(wealthcat~centr_tribe,weights=numhh,data=ezmbh2.clust)
summary(modwealthcatzmb)

modwealthcatzmb2<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=ezmbh2.clust)
summary(modwealthcatzmb2)

### Bootstrapping 

set.seed(02139)
sims<-1000
statwealthcat.zmb<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbh2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe,weights=numhh,data=s)
  statwealthcat.zmb[i]<-mod$coeff[2]
}

estwealthcat.zmb<-mean(statwealthcat.zmb)
sewealthcat.zmb<-sd(statwealthcat.zmb)
twealthcat.zmb<-estwealthcat.zmb/sewealthcat.zmb

pvalwealthcat.zmb<-2*(1-pt(abs(twealthcat.zmb),df=size-1))

reswealthcat.zmb<-c(estwealthcat.zmb,sewealthcat.zmb,pvalwealthcat.zmb)

### controlling for population density

set.seed(02139)
sims<-1000
statwealthcat.zmb2<-c()
statwealthcatpop.zmb2<-c()
size<-length(unique(ezmbh2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbh2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbh2.clust[new.row,]
  mod<-lm(wealthcat~centr_tribe+lnpd00,weights=numhh,data=s)
  statwealthcat.zmb2[i]<-mod$coeff[2]
  statwealthcatpop.zmb2[i]<-mod$coeff[3]
}

estwealthcat.zmb2<-mean(statwealthcat.zmb2)
sewealthcat.zmb2<-sd(statwealthcat.zmb2)
twealthcat.zmb2<-estwealthcat.zmb2/sewealthcat.zmb2

pvalwealthcat.zmb2<-2*(1-pt(abs(twealthcat.zmb2),df=size-1))

reswealthcat.zmb2<-c(estwealthcat.zmb2,sewealthcat.zmb2,pvalwealthcat.zmb2)

estwealthcatpop.zmb2<-mean(na.omit(statwealthcatpop.zmb2))
sewealthcatpop.zmb2<-sd(statwealthcatpop.zmb2)
twealthcatpop.zmb2<-estwealthcatpop.zmb2/sewealthcatpop.zmb2

pvalwealthcatpop.zmb2<-2*(1-pt(abs(twealthcatpop.zmb2),df=size-1))

reswealthcatpop.zmb2<-c(estwealthcatpop.zmb2,sewealthcatpop.zmb2,pvalwealthcatpop.zmb2)


### ANALYSIS (INDIVIDUAL) ###

### Education

modeduzmb<-lm(edu~centr_tribe,weights=numi,data=ezmbi2.clust)
summary(modeduzmb)

modeduzmb2<-lm(edu~centr_tribe+lnpd00,weights=numi,data=ezmbi2.clust)
summary(modeduzmb2)


### Bootstrapping

set.seed(02139)
sims<-1000
statedu.zmb<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(edu~centr_tribe,weights=numi,data=s)
  statedu.zmb[i]<-mod$coeff[2]
}

estedu.zmb<-mean(statedu.zmb)
seedu.zmb<-sd(statedu.zmb)
tedu.zmb<-estedu.zmb/seedu.zmb

pvaledu.zmb<-2*(1-pt(abs(tedu.zmb),df=size-1))

resedu.zmb<-c(estedu.zmb,seedu.zmb,pvaledu.zmb)

### controlling for population density

set.seed(02139)
sims<-1000
statedu.zmb2<-c()
statedupop.zmb2<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(edu~centr_tribe+lnpd00,weights=numi,data=s)
  statedu.zmb2[i]<-mod$coeff[2]
  statedupop.zmb2[i]<-mod$coeff[3]
}

estedu.zmb2<-mean(statedu.zmb2)
seedu.zmb2<-sd(statedu.zmb2)
tedu.zmb2<-estedu.zmb2/seedu.zmb2

pvaledu.zmb2<-2*(1-pt(abs(tedu.zmb2),df=size-1))

resedu.zmb2<-c(estedu.zmb2,seedu.zmb2,pvaledu.zmb2)

estedupop.zmb2<-mean(na.omit(statedupop.zmb2))
seedupop.zmb2<-sd(statedupop.zmb2)
tedupop.zmb2<-estedupop.zmb2/seedupop.zmb2

pvaledupop.zmb2<-2*(1-pt(abs(tedupop.zmb2),df=size-1))

resedupop.zmb2<-c(estedupop.zmb2,seedupop.zmb2,pvaledupop.zmb2)

### LiteracY

modlitzmb<-lm(lit~centr_tribe,weights=numi,data=ezmbi2.clust)
summary(modlitzmb) 

modlitzmb2<-lm(lit~centr_tribe+lnpd00,weights=numi,data=ezmbi2.clust)
summary(modlitzmb2) 

### Bootstrapping

set.seed(02139)
sims<-1000
statlit.zmb<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(lit~centr_tribe,weights=numi,data=s)
  statlit.zmb[i]<-mod$coeff[2]
}

estlit.zmb<-mean(statlit.zmb)
selit.zmb<-sd(statlit.zmb)
tlit.zmb<-estlit.zmb/selit.zmb

pvallit.zmb<-2*(1-pt(abs(tlit.zmb),df=size-1))

reslit.zmb<-c(estlit.zmb,selit.zmb,pvallit.zmb)

### Controlling for population density

set.seed(02139)
sims<-1000
statlit.zmb2<-c()
statlitpop.zmb2<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(lit~centr_tribe+lnpd00,weights=numi,data=s)
  statlit.zmb2[i]<-mod$coeff[2]
  statlitpop.zmb2[i]<-mod$coeff[3]
}

estlit.zmb2<-mean(statlit.zmb2)
selit.zmb2<-sd(statlit.zmb2)
tlit.zmb2<-estlit.zmb2/selit.zmb2

pvallit.zmb2<-2*(1-pt(abs(tlit.zmb2),df=size-1))

reslit.zmb2<-c(estlit.zmb2,selit.zmb2,pvallit.zmb2)

estlitpop.zmb2<-mean(na.omit(statlitpop.zmb2))
selitpop.zmb2<-sd(statlitpop.zmb2)
tlitpop.zmb2<-estlitpop.zmb2/selitpop.zmb2

pvallitpop.zmb2<-2*(1-pt(abs(tlitpop.zmb2),df=size-1))

reslitpop.zmb2<-c(estlitpop.zmb2,selitpop.zmb2,pvallitpop.zmb2)

### Child mortality

modmortzmb<-lm(mort~centr_tribe,weights=numi,data=ezmbi2.clust)
summary(modmortzmb) 

modmortzmb2<-lm(mort~centr_tribe+lnpd00,weights=numi,data=ezmbi2.clust)
summary(modmortzmb2)

### Bootstrapping

set.seed(02139)
sims<-1000
statmort.zmb<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(mort~centr_tribe,weights=numi,data=s)
  statmort.zmb[i]<-mod$coeff[2]
}

estmort.zmb<-mean(na.omit(statmort.zmb))
semort.zmb<-sd(na.omit(statmort.zmb))
tmort.zmb<-estmort.zmb/semort.zmb

pvalmort.zmb<-2*(1-pt(abs(tmort.zmb),df=size-1))

resmort.zmb<-c(estmort.zmb,semort.zmb,pvalmort.zmb)

### controlling for pop dens

set.seed(02139)
sims<-1000
statmort.zmb2<-c()
statmortpop.zmb2<-c()
size<-length(unique(ezmbi2.clust$TRIBE))
list.id<-tapply(1:nrow(ezmbi2.clust),ezmbi2.clust$TRIBE,function(x){x})
for(i in 1:sims){
  bl.b<-sample(1:size,size,replace=T)
  new.row<-unlist(list.id[bl.b])
  s<-ezmbi2.clust[new.row,]
  mod<-lm(mort~centr_tribe+lnpd00,weights=numi,data=s)
  statmort.zmb2[i]<-mod$coeff[2]
  statmortpop.zmb2[i]<-mod$coeff[3]
}

estmort.zmb2<-mean(statmort.zmb2)
semort.zmb2<-sd(statmort.zmb2)
tmort.zmb2<-estmort.zmb2/semort.zmb2

pvalmort.zmb2<-2*(1-pt(abs(tmort.zmb2),df=size-1))

resmort.zmb2<-c(estmort.zmb2,semort.zmb2,pvalmort.zmb2)

estmortpop.zmb2<-mean(na.omit(statmortpop.zmb2))
semortpop.zmb2<-sd(statmortpop.zmb2)
tmortpop.zmb2<-estmortpop.zmb2/semortpop.zmb2

pvalmortpop.zmb2<-2*(1-pt(abs(tmortpop.zmb2),df=size-1))

resmortpop.zmb2<-c(estmortpop.zmb2,semortpop.zmb2,pvalmortpop.zmb2)

### Making the table

stargazer(modwealthcatzmb,modwealthcatzmb2,modpipezmb,modpipezmb2,modeleczmb,modeleczmb2,modeduzmb,modeduzmb2,modlitzmb,modlitzmb2,modmortzmb,modmortzmb2,keep=c(1,2))

### Bootstrap standard errors

reswealthcat.zmb[2]
reswealthcat.zmb2[2]
reswealthcatpop.zmb2[2]

respipe.zmb[2]
respipe.zmb2[2]
respipepop.zmb2[2]

reselec.zmb[2]
reselec.zmb2[2]
reselecpop.zmb2[2]

resedu.zmb[2]
resedu.zmb2[2]
resedupop.zmb2[2]

reslit.zmb[2]
reslit.zmb2[2]
reslitpop.zmb2[2]

resmort.zmb[2]
resmort.zmb2[2]
resmortpop.zmb2[2]
